
from pprint import pprint
import time
import requests
import json
from proxy_helper import *

def get_page(page,proxies):
    # url = 'http://piping.mogumiao.com/proxy/api/get_ip_bs?appKey=01b92090f7b045bb8d7b5530dad03dd7&count=1&expiryDate=0&format=1&newLine=2'
    # https://list.mogu.com/search?callback=jQuery21109400356421341389_1581488095758&_version=8193&ratio=3%3A4&cKey=15&page=1&sort=pop&ad=0&fcid=51716&action=boyfriend&acm=3.mce.1_10_1ko5c.132244.0.heoPzrQfl6uHq.pos_7-m_482180-sd_119&ptp=31.Onv5v._head.34.EmfnrZ5B&_=1581488095759
    url ='https://www.qichamao.com/cert-wall'

    #伪造请求头
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
    }

    form_data = {"page":str(page),"pagesize":"9"}
    respose = ''
    if proxies is None:
        respose = requests.post(url, headers=headers, data=form_data)
    else:
        respose = requests.post(url, headers=headers, data=form_data,proxies=proxies)
    if respose.status_code == 200:
        # respose.conent返回的是字节流，用decode转换成字符串
        return respose.content.decode('utf-8')
    return ''


def parse_page(html):
    json_data = json.loads(html)
    result =json_data['dataList']
    for item in result:
        print(item['CompanyName'])


def main():
    proxies = None
    for page in range(2, 300):
        print(page)
        html = ''

        try:
            html = get_page(page, proxies)
        except Exception as e:
            html = ''

        if '{"isSuccess":true' not in html:
            # 使用代理ip
            time.sleep(1)
            proxies = get_proxies()
            print('使用代理ip', proxies)
            page -= 1
            continue

        # print(html)
        parse_page(html)


if __name__ == '__main__':
    main()